In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
In [2]:
## parse_dates tells python which column contains date/time data
## index_col assigns row index to specified column values
up_DO = pd.read_csv('Data/Clean/upstream/upstream-DO_mgL.csv',
parse_dates=['LocalTimestamp'],
index_col = 'LocalTimestamp')
up_level = pd.read_csv('Data/Clean/upstream/upstream-level_ft.csv',
parse_dates=['LocalTimestamp'],
index_col = 'LocalTimestamp')
up_wtemp = pd.read_csv('Data/Clean/upstream/upstream-wtemp_C.csv',
parse_dates=['LocalTimestamp'],
index_col = 'LocalTimestamp')
In [3]:
## rename columns
up_DO.columns = ['DO']
up_level.columns = ['Level']
up_wtemp.columns = ['WaterTemp']
In [4]:
## upload transitional data
tr_DO = pd.read_csv('Data/Clean/transitional/transitional-DO_mgL.csv',
parse_dates=['LocalTimestamp'],
index_col = 'LocalTimestamp')
tr_level = pd.read_csv('Data/Clean/transitional/transitional-level_ft.csv',
parse_dates=['LocalTimestamp'],
index_col = 'LocalTimestamp')
tr_wtemp = pd.read_csv('Data/Clean/transitional/transitional-wtemp_C.csv',
parse_dates=['LocalTimestamp'],
index_col = 'LocalTimestamp')
In [5]:
## rename columns
tr_DO.columns = ['DO']
tr_level.columns = ['Level']
tr_wtemp.columns = ['WaterTemp']
In [6]:
## upload estuarine data
es_DO = pd.read_csv('Data/Clean/estuarine/estuarine-DO_mgL.csv',
parse_dates=['LocalTimestamp'],
index_col = 'LocalTimestamp')
es_level = pd.read_csv('Data/Clean/estuarine/estuarine-level_ft.csv',
parse_dates=['LocalTimestamp'],
index_col = 'LocalTimestamp')
es_wtemp = pd.read_csv('Data/Clean/estuarine/estuarine-wtemp_C.csv',
parse_dates=['LocalTimestamp'],
index_col = 'LocalTimestamp')
In [7]:
## rename columns
es_DO.columns = ['DO']
es_level.columns = ['Level']
es_wtemp.columns = ['WaterTemp']
In [8]:
## let's look at the first three rows of the upstream DO data
up_DO[:3]
Out[8]:
In [9]:
## now let's look at the upstream water level data
up_level[:3]
Out[9]:
In [10]:
## to do this, we can merge the data using the LocalTimestamp columns in each of our imported
## data series:
upstream = up_level.join(up_DO)
upstream = upstream.join(up_wtemp)
In [11]:
## let's look at the first few rows of our newly merged data frame named "upstream"
upstream[:10]
Out[11]:
In [12]:
transitional = tr_level.join(tr_DO)
transitional = transitional.join(tr_wtemp)
transitional[:5]
Out[12]:
In [13]:
estuarine = es_level.join(es_DO)
estuarine = estuarine.join(es_wtemp)
estuarine[:5]
Out[13]:
In [14]:
lsjr = upstream.join(transitional,rsuffix='_tr')
lsjr = lsjr.join(estuarine,rsuffix='_es')
lsjr.columns.values[0:3] = ['Level_up','DO_up','WaterTemp_up']
lsjr[:5]
Out[14]:
In [15]:
## use pd.concat to create a new dataframe composed of specific lsjr columns
df = pd.concat([lsjr['DO_up'],lsjr['DO_tr'],lsjr['DO_es']],axis=1)
## create a multi-line plot
df.plot(marker='.')
## plot each of the three DO time series independently
up_DO.plot(marker='.', title = 'Upstream')
tr_DO.plot(marker='.', title = 'Transitional')
es_DO.plot(marker='.', title = 'Estuarine')
Out[15]:
In [16]:
## increase plot size for subsequent figures
plt.rcParams['figure.figsize'] = (16,12)
In [17]:
## create July subset and plot
dfjul = df[(df.index.month == 7)]
dfjul.dropna().plot(marker='.',title='July') # here, drop.na() has been used to get rid of
# rows that contain NA values, since these will
# interfere with the plot.
## create January subset and plot
dfjan = df[(df.index.month == 1)]
dfjan.dropna().plot(marker='.',title='January')
Out[17]:
In [18]:
## July and January boxplots
dfjul.plot.box()
dfjan.plot.box()
Out[18]:
In [19]:
## use pd.concat to create a new dataframe composed of specific lsjr columns
df_lv = pd.concat([lsjr['Level_up'],lsjr['Level_tr'],lsjr['Level_es']],axis=1)
## create July subset and plot
df_lv_jul = df_lv[(df.index.month == 7)]
df_lv_jul.dropna().plot(marker='.',title='July water level')
## create January subset and plot
df_lv_jan = df_lv[(df.index.month == 1)]
df_lv_jan.dropna().plot(marker='.',title='January water level')
Out[19]:
In [20]:
## use pd.concat to create a new dataframe composed of specific lsjr columns
df_wt = pd.concat([lsjr['WaterTemp_up'],lsjr['WaterTemp_tr'],lsjr['WaterTemp_es']],axis=1)
## create July subset and plot
df_wt_jul = df_wt[(df_wt.index.month == 7)]
df_wt_jul.dropna().plot(marker='.',title='July water temperature')
## create January subset and plot
df_wt_jan = df_wt[(df_wt.index.month == 1)]
df_wt_jan.dropna().plot(marker='.',title='January water temperature')
Out[20]:
In [21]:
## plot entire upstream dataset
upstream.dropna().plot(marker='.',title='Upstream')
## create July subset and plot
upstream_jul = upstream[(upstream.index.month == 7)]
upstream_jul.dropna().plot(marker='.',title='Upstream - July')
## create January subset and plot
upstream_jan = upstream[(upstream.index.month == 1)]
upstream_jan.dropna().plot(marker='.',title='Upstream - January')
Out[21]:
In [22]:
## plot entire transitional dataset
transitional.dropna().plot(marker='.',title='Transitional')
## create July subset and plot
transitional_jul = transitional[(transitional.index.month == 7)]
transitional_jul.dropna().plot(marker='.',title='Transitional - July')
## create January subset and plot
transitional_jan = transitional[(transitional.index.month == 1)]
transitional_jan.dropna().plot(marker='.',title='Transitional - January')
Out[22]:
In [23]:
## plot entire estuarine dataset
estuarine.dropna().plot(marker='.',title='Estuarine')
## create July subset and plot
estuarine_jul = estuarine[(estuarine.index.month == 7)]
estuarine_jul.dropna().plot(marker='.',title='Estuarine - July')
## create January subset and plot
estuarine_jan = estuarine[(estuarine.index.month == 1)]
estuarine_jan.dropna().plot(marker='.',title='Estuarine - January')
Out[23]:
In [24]:
upstream.corr()
Out[24]:
In [25]:
transitional.corr()
Out[25]:
In [26]:
estuarine.corr()
Out[26]:
In [27]:
## lag of 1 = 1 hour for upstream and estuarine, 15 minutes for transitional
## (remember the temporal resolution of the different datasets)
lag = 1
In [28]:
upstream_jul['DO'].dropna().corr(upstream_jul['WaterTemp'].dropna().shift(lag))
Out[28]:
In [29]:
upstream_jul['DO'].dropna().corr(upstream_jul['Level'].dropna().shift(lag))
Out[29]:
In [30]:
upstream_jan['DO'].dropna().corr(upstream_jan['WaterTemp'].dropna().shift(lag))
Out[30]:
In [31]:
upstream_jan['DO'].dropna().corr(upstream_jan['Level'].dropna().shift(lag))
Out[31]:
In [32]:
lag = 4
In [33]:
transitional_jul['DO'].dropna().corr(transitional_jul['WaterTemp'].dropna().shift(lag))
Out[33]:
In [34]:
transitional_jul['DO'].dropna().corr(transitional_jul['Level'].dropna().shift(lag))
Out[34]:
In [35]:
transitional_jan['DO'].dropna().corr(transitional_jan['WaterTemp'].dropna().shift(lag))
Out[35]:
In [36]:
transitional_jan['DO'].dropna().corr(transitional_jan['Level'].dropna().shift(lag))
Out[36]:
In [37]:
lag = 1
In [38]:
estuarine_jul['DO'].dropna().corr(estuarine_jul['WaterTemp'].dropna().shift(lag))
Out[38]:
In [39]:
estuarine_jul['DO'].dropna().corr(estuarine_jul['Level'].dropna().shift(lag))
Out[39]:
In [40]:
estuarine_jan['DO'].dropna().corr(estuarine_jan['WaterTemp'].dropna().shift(lag))
Out[40]:
In [41]:
estuarine_jan['DO'].dropna().corr(estuarine_jan['Level'].dropna().shift(lag))
Out[41]: